"""
将以下的json对象
{
    "attach_id": "7f95093b-125c-4775-8993-3057f4672257",
    "download_url": "https://disc.static.szse.cn/download/disc/disk03/finalpage/2023-05-18/2b520b24-8d3e-4dcd-9350-b7292d59ba3c.PDF?n=%E4%B8%80%E5%BD%AC%E7%A7%91%E6%8A%80%EF%BC%9A%E4%BF%A1%E6%B0%B8%E4%B8%AD%E5%92%8C%E4%BC%9A%E8%AE%A1%E5%B8%88%E4%BA%8B%E5%8A%A1%E6%89%80%EF%BC%88%E7%89%B9%E6%AE%8A%E6%99%AE%E9%80%9A%E5%90%88%E4%BC%99%EF%BC%89%E5%85%B3%E4%BA%8E%E5%AE%81%E6%B3%A2%E4%B8%80%E5%BD%AC%E7%94%B5%E5%AD%90%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8%E4%BB%A5%E8%87%AA%E7%AD%B9%E8%B5%84%E9%87%91%E9%A2%84%E5%85%88%E6%94%AF%E4%BB%98%E5%8F%91%E8%A1%8C%E8%B4%B9%E7%94%A8%E6%83%85%E5%86%B5%E7%9A%84%E9%89%B4%E8%AF%81%E6%8A%A5%E5%91%8A.PDF"
}
转为
{
    "attach_id": "7f95093b-125c-4775-8993-3057f4672257",
    "title": "\u957f\u4fe1...",
    "attach_path": "/disc/disk03/finalpage/2023-05-18/2b520b24-8d3e-4dcd-9350-b7292d59ba3c.PDF"
  },
其中title是?n=后面的内容的utf-8解码，attach_path是download_url的去除?n=其后面内容的部分，并去除前面的https://disc.static.szse.cn/download部分
"""
import json
from urllib.parse import unquote
RESULT_FILE = "result_urls.json"
with open("download_urls_only.json", "r", encoding="utf-8") as f:
    download_urls = json.load(f)
with open("download_urls_with_title.json", "r", encoding="utf-8") as f:
    download_urls_with_title = json.load(f)
result_urls = []

def get_title(url):
    decoded_title = unquote(url.split("?n=")[-1])
    return decoded_title

def get_attach_path(url):
    return url.replace("https://disc.static.szse.cn/download", "").split("?n=")[0]

for item in download_urls:
    title = get_title(item["download_url"])
    attach_path = get_attach_path(item["download_url"])
    result_urls.append({"attach_id": item["attach_id"], "title": title, "attach_path": attach_path})

result_urls.extend(download_urls_with_title)

print(f"共转换了{len(result_urls)}个URL")
with open(RESULT_FILE, "w", encoding="utf-8") as f:
    json.dump(result_urls, f, ensure_ascii=True, indent=2)

